CFLAGS ?= -O2
PICFLAG = -fPIC
C99FLAG = -std=c99
-WCFLAGS = -Wall -Wextra -pedantic
+WCFLAGS = -Wsign-conversion -Wall -Wextra -pedantic
UCFLAGS = $(CPPFLAGS) $(CFLAGS) $(PICFLAG) $(C99FLAG) $(WCFLAGS) -DUTF8PROC_EXPORTS $(UTF8PROC_DEFINES)
LDFLAG_SHARED = -shared
SOFLAG = -Wl,-soname
++error;
}
- if (sizeof(wint_t) > 2 || c < (1<<16)) {
- wint_t l0 = towlower(c), u0 = towupper(c);
+ if (sizeof(wint_t) > 2 || (c < (1<<16) && u < (1<<16) && l < (1<<16))) {
+ wint_t l0 = towlower((wint_t)c), u0 = towupper((wint_t)c);
/* OS unicode tables may be out of date. But if they
do have a lower/uppercase mapping, hopefully it
is correct? */
- if (l0 != c && l0 != l) {
+ if (l0 != (wint_t)c && l0 != (wint_t)l) {
fprintf(stderr, "MISMATCH %x != towlower(%x) == %x\n",
l, c, l0);
++error;
}
- else if (l0 != l) { /* often true for out-of-date OS unicode */
+ else if (l0 != (wint_t)l) { /* often true for out-of-date OS unicode */
++better;
/* printf("%x != towlower(%x) == %x\n", l, c, l0); */
}
- if (u0 != c && u0 != u) {
+ if (u0 != (wint_t)c && u0 != (wint_t)u) {
fprintf(stderr, "MISMATCH %x != towupper(%x) == %x\n",
u, c, u0);
++error;
}
- else if (u0 != u) { /* often true for out-of-date OS unicode */
+ else if (u0 != (wint_t)u) { /* often true for out-of-date OS unicode */
++better;
/* printf("%x != towupper(%x) == %x\n", u, c, u0); */
}
else
i++;
}
- glen = utf8proc_map(utf8, j, &g, UTF8PROC_CHARBOUND);
+ glen = utf8proc_map(utf8, (utf8proc_ssize_t)j, &g, UTF8PROC_CHARBOUND);
if (glen == UTF8PROC_ERROR_INVALIDUTF8) {
/* the test file contains surrogate codepoints, which are only for UTF-16 */
printf("line %zd: ignoring invalid UTF-8 codepoints\n", lineno);
utf8proc_bool expectbreak = false;
do {
utf8proc_int32_t codepoint;
- i += utf8proc_iterate(src + i, si - i, &codepoint);
+ i += (size_t)utf8proc_iterate(src + i, (utf8proc_ssize_t)(si - i), &codepoint);
check(codepoint >= 0, "invalid UTF-8 data");
if (codepoint == 0x002F)
expectbreak = true;
utf8proc_uint8_t *g;
glen = utf8proc_map(input, 6, &g, UTF8PROC_CHARBOUND);
check(!strcmp((char*)g, (char*)output), "mishandled u+ffff and u+fffe grapheme breaks");
+ check(glen != 6, "mishandled u+ffff and u+fffe grapheme breaks");
free(g);
};
#define CHECKVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,len,__LINE__)
#define CHECKINVALID(pos, val, len) buf[pos] = val; testbytes(buf,len,UTF8PROC_ERROR_INVALIDUTF8,__LINE__)
-static void testbytes(unsigned char *buf, int len, utf8proc_ssize_t retval, int line)
+static void testbytes(utf8proc_uint8_t *buf, utf8proc_ssize_t len, utf8proc_ssize_t retval, int line)
{
utf8proc_int32_t out[16];
utf8proc_ssize_t ret;
/* Make a copy to ensure that memory is left uninitialized after "len"
* bytes. This way, Valgrind can detect overreads.
*/
- unsigned char tmp[16];
- memcpy(tmp, buf, len);
+ utf8proc_uint8_t tmp[16];
+ memcpy(tmp, buf, (unsigned long int)len);
tests++;
if ((ret = utf8proc_iterate(tmp, len, out)) != retval) {
fprintf(stderr, "Failed (%d):", line);
- for (int i = 0; i < len ; i++) {
+ for (utf8proc_ssize_t i = 0; i < len ; i++) {
fprintf(stderr, " 0x%02x", tmp[i]);
}
fprintf(stderr, " -> %zd\n", ret);
int main(int argc, char **argv)
{
- uint32_t byt;
- unsigned char buf[16];
+ utf8proc_int32_t byt;
+ utf8proc_uint8_t buf[16];
(void) argc; (void) argv; /* unused */
for (i = 1; i < argc; ++i) {
utf8proc_uint8_t cstr[16], *map;
- unsigned int c;
+ utf8proc_uint32_t x;
+ utf8proc_int32_t c;
if (!strcmp(argv[i], "-V")) {
printf("utf8proc version %s\n", utf8proc_version());
continue;
}
- check(sscanf(argv[i],"%x",&c) == 1, "invalid hex input %s", argv[i]);
+ check(sscanf(argv[i],"%x", &x) == 1, "invalid hex input %s", argv[i]);
+ c = (utf8proc_int32_t)x;
const utf8proc_property_t *p = utf8proc_get_property(c);
if (utf8proc_codepoint_valid(c))
in dest, returning the number of bytes read from buf */
size_t encode(unsigned char *dest, const unsigned char *buf)
{
- size_t i = 0, j, d = 0;
+ size_t i = 0, j;
+ utf8proc_ssize_t d = 0;
for (;;) {
int c;
i = skipspaces(buf, i);
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(
const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_int32_t *dst
) {
- utf8proc_uint32_t uc;
+ utf8proc_int32_t uc;
const utf8proc_uint8_t *end;
*dst = -1;
return 1;
}
// Must be between 0xc2 and 0xf4 inclusive to be valid
- if ((uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
+ if ((utf8proc_uint32_t)(uc - 0xc2) > (0xf4-0xc2)) return UTF8PROC_ERROR_INVALIDUTF8;
if (uc < 0xe0) { // 2-byte sequence
// Must have valid continuation character
if (str >= end || !utf_cont(*str)) return UTF8PROC_ERROR_INVALIDUTF8;
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_tolower(utf8proc_int32_t c)
{
utf8proc_int32_t cl = utf8proc_get_property(c)->lowercase_seqindex;
- return cl != UINT16_MAX ? seqindex_decode_index(cl) : c;
+ return cl != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cl) : c;
}
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_toupper(utf8proc_int32_t c)
{
utf8proc_int32_t cu = utf8proc_get_property(c)->uppercase_seqindex;
- return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+ return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
}
UTF8PROC_DLLEXPORT utf8proc_int32_t utf8proc_totitle(utf8proc_int32_t c)
{
utf8proc_int32_t cu = utf8proc_get_property(c)->titlecase_seqindex;
- return cu != UINT16_MAX ? seqindex_decode_index(cu) : c;
+ return cu != UINT16_MAX ? seqindex_decode_index((utf8proc_uint32_t)cu) : c;
}
UTF8PROC_DLLEXPORT int utf8proc_islower(utf8proc_int32_t c)
#define utf8proc_decompose_lump(replacement_uc) \
return utf8proc_decompose_char((replacement_uc), dst, bufsize, \
- options & ~UTF8PROC_LUMP, last_boundclass)
+ options & ~(unsigned int)UTF8PROC_LUMP, last_boundclass)
UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(utf8proc_int32_t uc, utf8proc_int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) {
const utf8proc_property_t *property;
*dstptr = NULL;
result = utf8proc_decompose_custom(str, strlen, NULL, 0, options, custom_func, custom_data);
if (result < 0) return result;
- buffer = (utf8proc_int32_t *) malloc(result * sizeof(utf8proc_int32_t) + 1);
+ buffer = (utf8proc_int32_t *) malloc(((utf8proc_size_t)result) * sizeof(utf8proc_int32_t) + 1);
if (!buffer) return UTF8PROC_ERROR_NOMEM;
result = utf8proc_decompose_custom(str, strlen, buffer, result, options, custom_func, custom_data);
if (result < 0) {